Slicing


In [1]:
import pandas as pd

In [6]:
df = pd.read_csv('Datasets/direct_marketing.csv')

In [7]:
df


Out[7]:
recency history_segment history mens womens zip_code newbie channel segment visit conversion spend DM_category
0 10 2) $100 - $200 142.44 1 0 Surburban 0 Phone Womens E-Mail 0 0 0.0 4
1 6 3) $200 - $350 329.08 1 1 Rural 1 Web No E-Mail 0 0 0.0 11
2 7 2) $100 - $200 180.65 0 1 Surburban 1 Web Womens E-Mail 0 0 0.0 1
3 9 5) $500 - $750 675.83 1 0 Rural 1 Web Mens E-Mail 0 0 0.0 2
4 2 1) $0 - $100 45.34 1 0 Urban 0 Web Womens E-Mail 0 0 0.0 4
5 6 2) $100 - $200 134.83 0 1 Surburban 0 Phone Womens E-Mail 1 0 0.0 1
6 9 3) $200 - $350 280.20 1 0 Surburban 1 Phone Womens E-Mail 0 0 0.0 4
7 9 1) $0 - $100 46.42 0 1 Urban 0 Phone Womens E-Mail 0 0 0.0 1
8 9 5) $500 - $750 675.07 1 1 Rural 1 Phone Mens E-Mail 0 0 0.0 5
9 10 1) $0 - $100 32.84 0 1 Urban 1 Web Womens E-Mail 0 0 0.0 1
10 7 5) $500 - $750 548.91 0 1 Urban 1 Phone Womens E-Mail 1 0 0.0 1
11 1 3) $200 - $350 211.45 0 1 Urban 1 Phone Womens E-Mail 0 0 0.0 1
12 5 5) $500 - $750 642.90 0 1 Surburban 1 Multichannel Womens E-Mail 0 0 0.0 1
13 2 2) $100 - $200 101.64 0 1 Urban 0 Web Mens E-Mail 1 0 0.0 3
14 4 3) $200 - $350 241.42 0 1 Rural 1 Multichannel No E-Mail 0 0 0.0 5
15 3 1) $0 - $100 58.13 1 0 Urban 1 Web No E-Mail 1 0 0.0 6
16 5 1) $0 - $100 29.99 1 0 Surburban 0 Phone Mens E-Mail 0 0 0.0 2
17 9 2) $100 - $200 112.35 1 0 Rural 0 Web Mens E-Mail 0 0 0.0 2
18 11 3) $200 - $350 219.04 1 1 Surburban 0 Phone Mens E-Mail 0 0 0.0 5
19 5 6) $750 - $1,000 828.42 1 0 Surburban 1 Multichannel Mens E-Mail 0 0 0.0 2
20 9 1) $0 - $100 29.99 0 1 Surburban 1 Phone No E-Mail 0 0 0.0 5
21 11 2) $100 - $200 182.32 1 0 Surburban 0 Phone Mens E-Mail 0 0 0.0 2
22 2 2) $100 - $200 118.40 1 0 Surburban 0 Web Mens E-Mail 1 0 0.0 2
23 2 1) $0 - $100 29.99 0 1 Urban 1 Phone No E-Mail 0 0 0.0 5
24 4 1) $0 - $100 78.24 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
25 6 2) $100 - $200 139.87 0 1 Rural 1 Web Mens E-Mail 0 0 0.0 3
26 12 2) $100 - $200 115.39 0 1 Surburban 0 Phone Mens E-Mail 1 0 0.0 3
27 6 2) $100 - $200 162.98 0 1 Surburban 0 Web Mens E-Mail 0 0 0.0 3
28 7 4) $350 - $500 435.73 0 1 Urban 1 Web No E-Mail 0 0 0.0 5
29 2 3) $200 - $350 203.35 1 0 Rural 0 Web No E-Mail 0 0 0.0 6
... ... ... ... ... ... ... ... ... ... ... ... ... ...
63970 4 2) $100 - $200 191.15 0 1 Surburban 1 Web Womens E-Mail 0 0 0.0 1
63971 5 5) $500 - $750 549.87 0 1 Surburban 1 Phone Womens E-Mail 0 0 0.0 1
63972 3 5) $500 - $750 554.97 0 1 Surburban 1 Web No E-Mail 0 0 0.0 5
63973 8 4) $350 - $500 471.80 1 0 Surburban 1 Multichannel Womens E-Mail 0 0 0.0 4
63974 8 1) $0 - $100 73.65 0 1 Surburban 0 Phone Mens E-Mail 0 0 0.0 3
63975 10 6) $750 - $1,000 883.92 1 0 Urban 1 Phone No E-Mail 0 0 0.0 6
63976 1 5) $500 - $750 710.72 1 1 Urban 1 Phone No E-Mail 0 0 0.0 11
63977 8 1) $0 - $100 29.99 0 1 Surburban 0 Web No E-Mail 1 0 0.0 5
63978 10 2) $100 - $200 102.01 0 1 Urban 1 Phone Mens E-Mail 0 0 0.0 3
63979 10 2) $100 - $200 168.21 0 1 Surburban 0 Phone No E-Mail 0 0 0.0 5
63980 3 4) $350 - $500 487.10 0 1 Surburban 1 Phone No E-Mail 0 0 0.0 5
63981 4 2) $100 - $200 125.53 0 1 Rural 1 Phone No E-Mail 0 0 0.0 5
63982 5 1) $0 - $100 29.99 1 0 Urban 1 Phone Mens E-Mail 0 0 0.0 2
63983 2 1) $0 - $100 83.03 0 1 Urban 0 Phone No E-Mail 0 0 0.0 5
63984 2 3) $200 - $350 209.51 0 1 Urban 1 Web Womens E-Mail 0 0 0.0 1
63985 9 1) $0 - $100 29.99 1 0 Urban 0 Phone Mens E-Mail 1 0 0.0 2
63986 9 1) $0 - $100 35.26 0 1 Urban 1 Web Womens E-Mail 0 0 0.0 1
63987 1 1) $0 - $100 79.70 1 0 Surburban 1 Web No E-Mail 0 0 0.0 6
63988 6 1) $0 - $100 32.98 1 0 Surburban 0 Web Mens E-Mail 0 0 0.0 2
63989 10 3) $200 - $350 304.30 1 1 Surburban 0 Web Womens E-Mail 1 0 0.0 5
63990 6 1) $0 - $100 80.02 0 1 Surburban 0 Phone No E-Mail 0 0 0.0 5
63991 1 3) $200 - $350 306.10 1 0 Surburban 1 Phone Womens E-Mail 0 0 0.0 4
63992 1 5) $500 - $750 519.69 1 1 Urban 1 Phone Mens E-Mail 0 0 0.0 5
63993 4 4) $350 - $500 374.07 0 1 Surburban 0 Phone Womens E-Mail 0 0 0.0 1
63994 7 1) $0 - $100 86.46 0 1 Urban 0 Web Mens E-Mail 0 0 0.0 3
63995 10 2) $100 - $200 105.54 1 0 Urban 0 Web Mens E-Mail 0 0 0.0 2
63996 5 1) $0 - $100 38.91 0 1 Urban 1 Phone Mens E-Mail 0 0 0.0 3
63997 6 1) $0 - $100 29.99 1 0 Urban 1 Phone Mens E-Mail 0 0 0.0 2
63998 1 5) $500 - $750 552.94 1 0 Surburban 1 Multichannel Womens E-Mail 0 0 0.0 4
63999 1 4) $350 - $500 472.82 0 1 Surburban 0 Web Mens E-Mail 0 0 0.0 3

64000 rows × 13 columns


In [8]:
df.recency


Out[8]:
0        10
1         6
2         7
3         9
4         2
5         6
6         9
7         9
8         9
9        10
10        7
11        1
12        5
13        2
14        4
15        3
16        5
17        9
18       11
19        5
20        9
21       11
22        2
23        2
24        4
25        6
26       12
27        6
28        7
29        2
         ..
63970     4
63971     5
63972     3
63973     8
63974     8
63975    10
63976     1
63977     8
63978    10
63979    10
63980     3
63981     4
63982     5
63983     2
63984     2
63985     9
63986     9
63987     1
63988     6
63989    10
63990     6
63991     1
63992     1
63993     4
63994     7
63995    10
63996     5
63997     6
63998     1
63999     1
Name: recency, dtype: int64

In [9]:
df['recency']


Out[9]:
0        10
1         6
2         7
3         9
4         2
5         6
6         9
7         9
8         9
9        10
10        7
11        1
12        5
13        2
14        4
15        3
16        5
17        9
18       11
19        5
20        9
21       11
22        2
23        2
24        4
25        6
26       12
27        6
28        7
29        2
         ..
63970     4
63971     5
63972     3
63973     8
63974     8
63975    10
63976     1
63977     8
63978    10
63979    10
63980     3
63981     4
63982     5
63983     2
63984     2
63985     9
63986     9
63987     1
63988     6
63989    10
63990     6
63991     1
63992     1
63993     4
63994     7
63995    10
63996     5
63997     6
63998     1
63999     1
Name: recency, dtype: int64

In [10]:
df.loc[:, 'recency']


Out[10]:
0        10
1         6
2         7
3         9
4         2
5         6
6         9
7         9
8         9
9        10
10        7
11        1
12        5
13        2
14        4
15        3
16        5
17        9
18       11
19        5
20        9
21       11
22        2
23        2
24        4
25        6
26       12
27        6
28        7
29        2
         ..
63970     4
63971     5
63972     3
63973     8
63974     8
63975    10
63976     1
63977     8
63978    10
63979    10
63980     3
63981     4
63982     5
63983     2
63984     2
63985     9
63986     9
63987     1
63988     6
63989    10
63990     6
63991     1
63992     1
63993     4
63994     7
63995    10
63996     5
63997     6
63998     1
63999     1
Name: recency, dtype: int64

In [11]:
df.iloc[:, 0]


Out[11]:
0        10
1         6
2         7
3         9
4         2
5         6
6         9
7         9
8         9
9        10
10        7
11        1
12        5
13        2
14        4
15        3
16        5
17        9
18       11
19        5
20        9
21       11
22        2
23        2
24        4
25        6
26       12
27        6
28        7
29        2
         ..
63970     4
63971     5
63972     3
63973     8
63974     8
63975    10
63976     1
63977     8
63978    10
63979    10
63980     3
63981     4
63982     5
63983     2
63984     2
63985     9
63986     9
63987     1
63988     6
63989    10
63990     6
63991     1
63992     1
63993     4
63994     7
63995    10
63996     5
63997     6
63998     1
63999     1
Name: recency, dtype: int64

In [13]:
df.ix[:, 0]


Out[13]:
0        10
1         6
2         7
3         9
4         2
5         6
6         9
7         9
8         9
9        10
10        7
11        1
12        5
13        2
14        4
15        3
16        5
17        9
18       11
19        5
20        9
21       11
22        2
23        2
24        4
25        6
26       12
27        6
28        7
29        2
         ..
63970     4
63971     5
63972     3
63973     8
63974     8
63975    10
63976     1
63977     8
63978    10
63979    10
63980     3
63981     4
63982     5
63983     2
63984     2
63985     9
63986     9
63987     1
63988     6
63989    10
63990     6
63991     1
63992     1
63993     4
63994     7
63995    10
63996     5
63997     6
63998     1
63999     1
Name: recency, dtype: int64

In [14]:
df[['recency']]


Out[14]:
recency
0 10
1 6
2 7
3 9
4 2
5 6
6 9
7 9
8 9
9 10
10 7
11 1
12 5
13 2
14 4
15 3
16 5
17 9
18 11
19 5
20 9
21 11
22 2
23 2
24 4
25 6
26 12
27 6
28 7
29 2
... ...
63970 4
63971 5
63972 3
63973 8
63974 8
63975 10
63976 1
63977 8
63978 10
63979 10
63980 3
63981 4
63982 5
63983 2
63984 2
63985 9
63986 9
63987 1
63988 6
63989 10
63990 6
63991 1
63992 1
63993 4
63994 7
63995 10
63996 5
63997 6
63998 1
63999 1

64000 rows × 1 columns


In [15]:
df.loc[:, ['recency']]


Out[15]:
recency
0 10
1 6
2 7
3 9
4 2
5 6
6 9
7 9
8 9
9 10
10 7
11 1
12 5
13 2
14 4
15 3
16 5
17 9
18 11
19 5
20 9
21 11
22 2
23 2
24 4
25 6
26 12
27 6
28 7
29 2
... ...
63970 4
63971 5
63972 3
63973 8
63974 8
63975 10
63976 1
63977 8
63978 10
63979 10
63980 3
63981 4
63982 5
63983 2
63984 2
63985 9
63986 9
63987 1
63988 6
63989 10
63990 6
63991 1
63992 1
63993 4
63994 7
63995 10
63996 5
63997 6
63998 1
63999 1

64000 rows × 1 columns


In [17]:
df.iloc[:, [0,1]]


Out[17]:
recency history_segment
0 10 2) $100 - $200
1 6 3) $200 - $350
2 7 2) $100 - $200
3 9 5) $500 - $750
4 2 1) $0 - $100
5 6 2) $100 - $200
6 9 3) $200 - $350
7 9 1) $0 - $100
8 9 5) $500 - $750
9 10 1) $0 - $100
10 7 5) $500 - $750
11 1 3) $200 - $350
12 5 5) $500 - $750
13 2 2) $100 - $200
14 4 3) $200 - $350
15 3 1) $0 - $100
16 5 1) $0 - $100
17 9 2) $100 - $200
18 11 3) $200 - $350
19 5 6) $750 - $1,000
20 9 1) $0 - $100
21 11 2) $100 - $200
22 2 2) $100 - $200
23 2 1) $0 - $100
24 4 1) $0 - $100
25 6 2) $100 - $200
26 12 2) $100 - $200
27 6 2) $100 - $200
28 7 4) $350 - $500
29 2 3) $200 - $350
... ... ...
63970 4 2) $100 - $200
63971 5 5) $500 - $750
63972 3 5) $500 - $750
63973 8 4) $350 - $500
63974 8 1) $0 - $100
63975 10 6) $750 - $1,000
63976 1 5) $500 - $750
63977 8 1) $0 - $100
63978 10 2) $100 - $200
63979 10 2) $100 - $200
63980 3 4) $350 - $500
63981 4 2) $100 - $200
63982 5 1) $0 - $100
63983 2 1) $0 - $100
63984 2 3) $200 - $350
63985 9 1) $0 - $100
63986 9 1) $0 - $100
63987 1 1) $0 - $100
63988 6 1) $0 - $100
63989 10 3) $200 - $350
63990 6 1) $0 - $100
63991 1 3) $200 - $350
63992 1 5) $500 - $750
63993 4 4) $350 - $500
63994 7 1) $0 - $100
63995 10 2) $100 - $200
63996 5 1) $0 - $100
63997 6 1) $0 - $100
63998 1 5) $500 - $750
63999 1 4) $350 - $500

64000 rows × 2 columns


In [18]:
df.iloc[0:2, :]


Out[18]:
recency history_segment history mens womens zip_code newbie channel segment visit conversion spend DM_category
0 10 2) $100 - $200 142.44 1 0 Surburban 0 Phone Womens E-Mail 0 0 0.0 4
1 6 3) $200 - $350 329.08 1 1 Rural 1 Web No E-Mail 0 0 0.0 11

In [20]:
df.loc[0:1, :]


Out[20]:
recency history_segment history mens womens zip_code newbie channel segment visit conversion spend DM_category
0 10 2) $100 - $200 142.44 1 0 Surburban 0 Phone Womens E-Mail 0 0 0.0 4
1 6 3) $200 - $350 329.08 1 1 Rural 1 Web No E-Mail 0 0 0.0 11

In [19]:
df[0:2]


Out[19]:
recency history_segment history mens womens zip_code newbie channel segment visit conversion spend DM_category
0 10 2) $100 - $200 142.44 1 0 Surburban 0 Phone Womens E-Mail 0 0 0.0 4
1 6 3) $200 - $350 329.08 1 1 Rural 1 Web No E-Mail 0 0 0.0 11

Dicin


In [21]:
df.recency < 7


Out[21]:
0        False
1         True
2        False
3        False
4         True
5         True
6        False
7        False
8        False
9        False
10       False
11        True
12        True
13        True
14        True
15        True
16        True
17       False
18       False
19        True
20       False
21       False
22        True
23        True
24        True
25        True
26       False
27        True
28       False
29        True
         ...  
63970     True
63971     True
63972     True
63973    False
63974    False
63975    False
63976     True
63977    False
63978    False
63979    False
63980     True
63981     True
63982     True
63983     True
63984     True
63985    False
63986    False
63987     True
63988     True
63989    False
63990     True
63991     True
63992     True
63993     True
63994    False
63995    False
63996     True
63997     True
63998     True
63999     True
Name: recency, dtype: bool

In [22]:
df[ df.recency < 7 ]


Out[22]:
recency history_segment history mens womens zip_code newbie channel segment visit conversion spend DM_category
1 6 3) $200 - $350 329.08 1 1 Rural 1 Web No E-Mail 0 0 0.0 11
4 2 1) $0 - $100 45.34 1 0 Urban 0 Web Womens E-Mail 0 0 0.0 4
5 6 2) $100 - $200 134.83 0 1 Surburban 0 Phone Womens E-Mail 1 0 0.0 1
11 1 3) $200 - $350 211.45 0 1 Urban 1 Phone Womens E-Mail 0 0 0.0 1
12 5 5) $500 - $750 642.90 0 1 Surburban 1 Multichannel Womens E-Mail 0 0 0.0 1
13 2 2) $100 - $200 101.64 0 1 Urban 0 Web Mens E-Mail 1 0 0.0 3
14 4 3) $200 - $350 241.42 0 1 Rural 1 Multichannel No E-Mail 0 0 0.0 5
15 3 1) $0 - $100 58.13 1 0 Urban 1 Web No E-Mail 1 0 0.0 6
16 5 1) $0 - $100 29.99 1 0 Surburban 0 Phone Mens E-Mail 0 0 0.0 2
19 5 6) $750 - $1,000 828.42 1 0 Surburban 1 Multichannel Mens E-Mail 0 0 0.0 2
22 2 2) $100 - $200 118.40 1 0 Surburban 0 Web Mens E-Mail 1 0 0.0 2
23 2 1) $0 - $100 29.99 0 1 Urban 1 Phone No E-Mail 0 0 0.0 5
24 4 1) $0 - $100 78.24 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
25 6 2) $100 - $200 139.87 0 1 Rural 1 Web Mens E-Mail 0 0 0.0 3
27 6 2) $100 - $200 162.98 0 1 Surburban 0 Web Mens E-Mail 0 0 0.0 3
29 2 3) $200 - $350 203.35 1 0 Rural 0 Web No E-Mail 0 0 0.0 6
30 2 3) $200 - $350 237.53 0 1 Surburban 0 Phone Womens E-Mail 0 0 0.0 1
32 6 2) $100 - $200 128.01 0 1 Urban 0 Web Mens E-Mail 0 0 0.0 3
34 3 1) $0 - $100 29.99 1 0 Rural 0 Web Womens E-Mail 0 0 0.0 4
35 4 3) $200 - $350 218.72 0 1 Urban 0 Multichannel Womens E-Mail 0 0 0.0 1
36 1 5) $500 - $750 514.52 0 1 Surburban 1 Web Mens E-Mail 0 0 0.0 3
37 4 6) $750 - $1,000 766.47 1 1 Urban 1 Multichannel Mens E-Mail 0 0 0.0 5
41 3 1) $0 - $100 99.23 1 0 Rural 0 Web Mens E-Mail 1 0 0.0 2
43 2 4) $350 - $500 492.02 1 0 Surburban 0 Phone No E-Mail 0 0 0.0 6
44 1 1) $0 - $100 48.32 0 1 Urban 0 Web No E-Mail 0 0 0.0 5
46 2 4) $350 - $500 391.33 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
47 1 5) $500 - $750 729.70 1 1 Surburban 1 Web Mens E-Mail 0 0 0.0 5
48 3 2) $100 - $200 134.59 1 0 Urban 1 Phone Womens E-Mail 1 0 0.0 4
50 3 3) $200 - $350 203.30 0 1 Surburban 0 Web No E-Mail 0 0 0.0 5
55 6 1) $0 - $100 42.66 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
... ... ... ... ... ... ... ... ... ... ... ... ... ...
63949 5 1) $0 - $100 86.79 1 0 Rural 0 Phone No E-Mail 0 0 0.0 6
63950 1 1) $0 - $100 45.67 0 1 Surburban 1 Web Mens E-Mail 0 0 0.0 3
63953 5 2) $100 - $200 166.24 0 1 Urban 0 Phone No E-Mail 0 0 0.0 5
63954 2 1) $0 - $100 93.97 1 0 Urban 0 Web No E-Mail 1 0 0.0 6
63955 1 1) $0 - $100 29.99 1 0 Surburban 0 Phone Mens E-Mail 0 0 0.0 2
63960 1 3) $200 - $350 221.89 0 1 Surburban 1 Multichannel No E-Mail 0 0 0.0 5
63961 4 3) $200 - $350 337.36 1 0 Urban 0 Web Mens E-Mail 1 0 0.0 2
63964 2 6) $750 - $1,000 772.99 1 1 Surburban 1 Web Mens E-Mail 0 0 0.0 5
63966 4 2) $100 - $200 170.03 1 0 Surburban 0 Web Womens E-Mail 0 0 0.0 4
63967 5 1) $0 - $100 77.73 0 1 Urban 1 Phone No E-Mail 0 0 0.0 5
63969 3 1) $0 - $100 67.78 0 1 Surburban 0 Web Womens E-Mail 0 0 0.0 1
63970 4 2) $100 - $200 191.15 0 1 Surburban 1 Web Womens E-Mail 0 0 0.0 1
63971 5 5) $500 - $750 549.87 0 1 Surburban 1 Phone Womens E-Mail 0 0 0.0 1
63972 3 5) $500 - $750 554.97 0 1 Surburban 1 Web No E-Mail 0 0 0.0 5
63976 1 5) $500 - $750 710.72 1 1 Urban 1 Phone No E-Mail 0 0 0.0 11
63980 3 4) $350 - $500 487.10 0 1 Surburban 1 Phone No E-Mail 0 0 0.0 5
63981 4 2) $100 - $200 125.53 0 1 Rural 1 Phone No E-Mail 0 0 0.0 5
63982 5 1) $0 - $100 29.99 1 0 Urban 1 Phone Mens E-Mail 0 0 0.0 2
63983 2 1) $0 - $100 83.03 0 1 Urban 0 Phone No E-Mail 0 0 0.0 5
63984 2 3) $200 - $350 209.51 0 1 Urban 1 Web Womens E-Mail 0 0 0.0 1
63987 1 1) $0 - $100 79.70 1 0 Surburban 1 Web No E-Mail 0 0 0.0 6
63988 6 1) $0 - $100 32.98 1 0 Surburban 0 Web Mens E-Mail 0 0 0.0 2
63990 6 1) $0 - $100 80.02 0 1 Surburban 0 Phone No E-Mail 0 0 0.0 5
63991 1 3) $200 - $350 306.10 1 0 Surburban 1 Phone Womens E-Mail 0 0 0.0 4
63992 1 5) $500 - $750 519.69 1 1 Urban 1 Phone Mens E-Mail 0 0 0.0 5
63993 4 4) $350 - $500 374.07 0 1 Surburban 0 Phone Womens E-Mail 0 0 0.0 1
63996 5 1) $0 - $100 38.91 0 1 Urban 1 Phone Mens E-Mail 0 0 0.0 3
63997 6 1) $0 - $100 29.99 1 0 Urban 1 Phone Mens E-Mail 0 0 0.0 2
63998 1 5) $500 - $750 552.94 1 0 Surburban 1 Multichannel Womens E-Mail 0 0 0.0 4
63999 1 4) $350 - $500 472.82 0 1 Surburban 0 Web Mens E-Mail 0 0 0.0 3

36585 rows × 13 columns


In [23]:
df[ (df.recency < 7) & (df.newbie == 0) ]


Out[23]:
recency history_segment history mens womens zip_code newbie channel segment visit conversion spend DM_category
4 2 1) $0 - $100 45.34 1 0 Urban 0 Web Womens E-Mail 0 0 0.0 4
5 6 2) $100 - $200 134.83 0 1 Surburban 0 Phone Womens E-Mail 1 0 0.0 1
13 2 2) $100 - $200 101.64 0 1 Urban 0 Web Mens E-Mail 1 0 0.0 3
16 5 1) $0 - $100 29.99 1 0 Surburban 0 Phone Mens E-Mail 0 0 0.0 2
22 2 2) $100 - $200 118.40 1 0 Surburban 0 Web Mens E-Mail 1 0 0.0 2
24 4 1) $0 - $100 78.24 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
27 6 2) $100 - $200 162.98 0 1 Surburban 0 Web Mens E-Mail 0 0 0.0 3
29 2 3) $200 - $350 203.35 1 0 Rural 0 Web No E-Mail 0 0 0.0 6
30 2 3) $200 - $350 237.53 0 1 Surburban 0 Phone Womens E-Mail 0 0 0.0 1
32 6 2) $100 - $200 128.01 0 1 Urban 0 Web Mens E-Mail 0 0 0.0 3
34 3 1) $0 - $100 29.99 1 0 Rural 0 Web Womens E-Mail 0 0 0.0 4
35 4 3) $200 - $350 218.72 0 1 Urban 0 Multichannel Womens E-Mail 0 0 0.0 1
41 3 1) $0 - $100 99.23 1 0 Rural 0 Web Mens E-Mail 1 0 0.0 2
43 2 4) $350 - $500 492.02 1 0 Surburban 0 Phone No E-Mail 0 0 0.0 6
44 1 1) $0 - $100 48.32 0 1 Urban 0 Web No E-Mail 0 0 0.0 5
46 2 4) $350 - $500 391.33 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
50 3 3) $200 - $350 203.30 0 1 Surburban 0 Web No E-Mail 0 0 0.0 5
55 6 1) $0 - $100 42.66 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
59 3 2) $100 - $200 143.93 0 1 Surburban 0 Phone Womens E-Mail 0 0 0.0 1
61 2 1) $0 - $100 96.91 1 0 Surburban 0 Phone Womens E-Mail 0 0 0.0 4
65 5 3) $200 - $350 222.07 0 1 Surburban 0 Phone Womens E-Mail 0 0 0.0 1
70 2 3) $200 - $350 278.80 1 0 Rural 0 Web Mens E-Mail 0 0 0.0 2
72 2 4) $350 - $500 428.74 1 0 Rural 0 Phone Mens E-Mail 0 0 0.0 2
76 4 2) $100 - $200 194.11 1 0 Urban 0 Phone No E-Mail 0 0 0.0 6
79 2 1) $0 - $100 29.99 0 1 Surburban 0 Phone Mens E-Mail 0 0 0.0 3
81 2 1) $0 - $100 95.33 0 1 Surburban 0 Web No E-Mail 1 0 0.0 5
86 2 1) $0 - $100 82.59 1 0 Surburban 0 Phone Womens E-Mail 0 0 0.0 4
87 6 2) $100 - $200 165.77 0 1 Urban 0 Web Womens E-Mail 1 0 0.0 1
95 3 2) $100 - $200 133.51 1 0 Urban 0 Web Mens E-Mail 0 0 0.0 2
97 1 1) $0 - $100 47.44 0 1 Urban 0 Phone Womens E-Mail 0 0 0.0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ...
63890 2 3) $200 - $350 229.79 1 1 Urban 0 Phone Mens E-Mail 1 0 0.0 5
63891 4 1) $0 - $100 65.23 0 1 Urban 0 Phone Womens E-Mail 0 0 0.0 1
63893 2 3) $200 - $350 288.30 1 1 Surburban 0 Phone Womens E-Mail 0 0 0.0 5
63901 4 2) $100 - $200 158.91 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
63902 3 3) $200 - $350 309.39 1 1 Urban 0 Phone Womens E-Mail 0 0 0.0 5
63904 1 1) $0 - $100 29.99 0 1 Surburban 0 Web Womens E-Mail 1 0 0.0 1
63906 4 3) $200 - $350 311.63 0 1 Urban 0 Multichannel No E-Mail 0 0 0.0 5
63910 6 1) $0 - $100 29.99 1 0 Surburban 0 Phone Mens E-Mail 0 0 0.0 2
63911 5 4) $350 - $500 373.13 0 1 Urban 0 Phone Womens E-Mail 0 0 0.0 1
63915 4 3) $200 - $350 300.90 1 0 Surburban 0 Phone No E-Mail 0 0 0.0 6
63916 6 1) $0 - $100 34.50 1 0 Urban 0 Phone Womens E-Mail 0 0 0.0 4
63918 3 1) $0 - $100 29.99 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
63919 3 2) $100 - $200 149.96 1 0 Urban 0 Web Womens E-Mail 0 0 0.0 4
63924 4 2) $100 - $200 101.40 0 1 Urban 0 Web No E-Mail 0 0 0.0 5
63931 2 1) $0 - $100 29.99 1 0 Surburban 0 Web No E-Mail 0 0 0.0 6
63932 1 4) $350 - $500 426.36 1 1 Surburban 0 Multichannel Mens E-Mail 1 0 0.0 5
63935 4 4) $350 - $500 353.47 1 1 Urban 0 Multichannel Mens E-Mail 0 0 0.0 5
63941 2 2) $100 - $200 130.96 1 0 Urban 0 Phone Mens E-Mail 0 0 0.0 2
63949 5 1) $0 - $100 86.79 1 0 Rural 0 Phone No E-Mail 0 0 0.0 6
63953 5 2) $100 - $200 166.24 0 1 Urban 0 Phone No E-Mail 0 0 0.0 5
63954 2 1) $0 - $100 93.97 1 0 Urban 0 Web No E-Mail 1 0 0.0 6
63955 1 1) $0 - $100 29.99 1 0 Surburban 0 Phone Mens E-Mail 0 0 0.0 2
63961 4 3) $200 - $350 337.36 1 0 Urban 0 Web Mens E-Mail 1 0 0.0 2
63966 4 2) $100 - $200 170.03 1 0 Surburban 0 Web Womens E-Mail 0 0 0.0 4
63969 3 1) $0 - $100 67.78 0 1 Surburban 0 Web Womens E-Mail 0 0 0.0 1
63983 2 1) $0 - $100 83.03 0 1 Urban 0 Phone No E-Mail 0 0 0.0 5
63988 6 1) $0 - $100 32.98 1 0 Surburban 0 Web Mens E-Mail 0 0 0.0 2
63990 6 1) $0 - $100 80.02 0 1 Surburban 0 Phone No E-Mail 0 0 0.0 5
63993 4 4) $350 - $500 374.07 0 1 Surburban 0 Phone Womens E-Mail 0 0 0.0 1
63999 1 4) $350 - $500 472.82 0 1 Surburban 0 Web Mens E-Mail 0 0 0.0 3

17551 rows × 13 columns

preparing data


In [35]:
ordered_satisfaction = ['Very Unhappy', 'Unhappy', 'Neutral', 'Happy', 'Very Happy']

In [40]:
df = pd.DataFrame({'satisfaction':['Mad', 'Happy', 'Unhappy', 'Neutral']})
df.satisfaction = df.satisfaction.astype("category",
  ordered=True,
  categories=ordered_satisfaction
).cat.codes
df.satisfaction


Out[40]:
0   -1
1    3
2    1
3    2
Name: satisfaction, dtype: int8

In [33]:
df = pd.DataFrame({'vertebrates':[
  'Bird',
  'Bird',
  'Mammal',
  'Fish',
  'Amphibian',
  'Reptile',
  'Mammal',
 ]})

In [34]:
df['vertebrates cata'] = df.vertebrates.astype("category").cat.codes
df.vertebrates.unique()


Out[34]:
array(['Bird', 'Mammal', 'Fish', 'Amphibian', 'Reptile'], dtype=object)

In [49]:
df = pd.get_dummies(df,columns=['vertebrates'])
df


Out[49]:
vertebrates cata vertebrates_Amphibian vertebrates_Bird vertebrates_Fish vertebrates_Mammal vertebrates_Reptile
0 1 0 1 0 0 0
1 1 0 1 0 0 0
2 3 0 0 0 1 0
3 2 0 0 1 0 0
4 0 1 0 0 0 0
5 4 0 0 0 0 1
6 3 0 0 0 1 0

Pure Textual Features


In [50]:
from sklearn.feature_extraction.text import CountVectorizer

In [51]:
corpus = [
  "Authman ran faster than Harry because he is an athlete.",
  "Authman and Harry ran faster and faster.",
 ]

In [54]:
bow = CountVectorizer()
X = bow.fit_transform(corpus)

In [55]:
bow.get_feature_names()


Out[55]:
['an',
 'and',
 'athlete',
 'authman',
 'because',
 'faster',
 'harry',
 'he',
 'is',
 'ran',
 'than']

In [56]:
X.toarray()


Out[56]:
array([[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 2, 0, 1, 0, 2, 1, 0, 0, 1, 0]], dtype=int64)

In [ ]: